Here we pre-process the Malaria Genomics Project Pf3k project data for both IBD analysis within isoRelate and to construct a processed ground truth data set using the known mixture clones present in the data.
Up to this point the following steps have been applied to the VCF files obtained from Pf3k website.
For each chromosome VCF file we applied the following filters using vcftools:
After we have processed each chromosome VCF we concatenate them together.
set -e
vcfdir=/path/to/malaria_gen5/vcf_files
outdir=./raw_data/
tempdir=~/tmp/
# step 1, select only SNPs + VQSLOD > 0, regiontype = core
vcf_chrom=$(find $vcfdir -regex ".*\.*[0-9][0-9]_v3.*.vcf.gz" | sort -u)
for chr in $vcf_chrom
do
(stem=$(basename $chr .vcf.gz)
echo $stem
vcftools --gzvcf $chr \
--remove-indels \
--min-alleles 2 \
--max-alleles 2 \
--remove-filtered-all \
--recode \
--recode-INFO-all \
--stdout | bgzip -c > ${tempdir}${stem}.vcf.gz
tabix -p vcf ${tempdir}${stem}.vcf.gz) &
done
wait
filtered_vcf=$(find $tempdir -name "*.vcf.gz" | sort -u)
bcftools concat -Oz -o ${outdir}malaria_gen5_biallelic_snps.vcf.gz $filtered_vcf
Then we convert the VCF to the GDS format keeping only the tags of interest.
library(SeqArray)
vcf_file <- "raw_data/malaria_gen5_biallelic_snps.vcf.gz"
vcf_header <- seqVCF_Header(vcf_file)
# recode header format for AD from R to .
vcf_header$format$Number[vcf_header$ID == "AD"] <- "."
# info columns to keep
info.import <- c("AC", "AF", "AN", "BaseQRankSum", "ClippingRankSum",
"DP", "FS", "InbreedingCoeff", "MQ", "MQRankSum",
"QD", "ReadPosRankSum",
"SOR", "VQSLOD", "SNPEFF_AMINO_ACID_CHANGE",
"SNPEFF_CODON_CHANGE", "SNPEFF_EFFECT", "SNPEFF_EXON_ID",
"SNPEFF_FUNCTIONAL_CLASS", "SNPEFF_GENE_BIOTYPE",
"SNPEFF_GENE_NAME", "SNPEFF_IMPACT", "SNPEFF_TRANSCRIPT_ID")
# format columns to keep
fmt.info <- c("AD", "DP", "GQ", "GT", "PL", "PGT", "PID")
seqVCF2GDS(vcf_file, "processed_data/malaria_gen5.gds",
header = vcf_header,
info.import = info.import,
fmt.import = fmt.info)
We now are ready to use the GDS file for analysis in R.
suppressPackageStartupMessages(library(SeqArray))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(moimix))
# use readonly = FALSE to add sample annotations
mgen <- seqOpen("processed_data/malaria_gen5.gds", readonly = FALSE)
The final variant set consists of 1057870 SNVs segregating by chromosome as follows:
knitr::kable(data.frame(chr = seqGetData(mgen, "chromosome") )
%>% count(chr),
caption = "SNV counts by chromosome")
| chr | n |
|---|---|
| Pf3D7_01_v3 | 21475 |
| Pf3D7_02_v3 | 35168 |
| Pf3D7_03_v3 | 43199 |
| Pf3D7_04_v3 | 45051 |
| Pf3D7_05_v3 | 61349 |
| Pf3D7_06_v3 | 55371 |
| Pf3D7_07_v3 | 57980 |
| Pf3D7_08_v3 | 58741 |
| Pf3D7_09_v3 | 73279 |
| Pf3D7_10_v3 | 72528 |
| Pf3D7_11_v3 | 98484 |
| Pf3D7_12_v3 | 99409 |
| Pf3D7_13_v3 | 151941 |
| Pf3D7_14_v3 | 183895 |
We have also processed the sample metadata to see how the samples segregate by geography. Samples with missing country variable correspond to the lab-strains from the complexity of infection and genetic crosses study.
# load metadata
library(readr)
mgen_meta <- read_rds("processed_data/mgen_clean.rds")
There are 2512 total field isolates and the dataset can be broadly grouped into three broader geographic locations:
west_africa <- c("The Gambia", "Ghana", "Guinea", "Mali", "Nigeria", "Senegal")
central_africa <- c("DR of the Congo", "Malawi")
se_asia <- c("Bangladesh", "Cambodia", "Laos", "Myanmar", "Thailand", "Vietnam")
mgen_meta$region <- NA
mgen_meta$region[mgen_meta$country %in% west_africa] <- "West Africa"
mgen_meta$region[mgen_meta$country %in% central_africa] <- "Central Africa"
mgen_meta$region[mgen_meta$country %in% se_asia] <- "SE Asia"
field_isolates <- mgen_meta %>% filter(!is.na(country))
knitr::kable(field_isolates %>% count(region, country, site))
| region | country | site | n |
|---|---|---|---|
| Central Africa | DR of the Congo | Kinshasa | 113 |
| Central Africa | Malawi | Chikwawa | 317 |
| Central Africa | Malawi | Zomba | 52 |
| SE Asia | Bangladesh | Ramu | 50 |
| SE Asia | Cambodia | Pailin | 84 |
| SE Asia | Cambodia | Preah Vihear | 104 |
| SE Asia | Cambodia | Pursat | 235 |
| SE Asia | Cambodia | Ratanakiri | 147 |
| SE Asia | Laos | Attapeu | 85 |
| SE Asia | Myanmar | Bago Division | 60 |
| SE Asia | Thailand | Mae Sot | 106 |
| SE Asia | Thailand | Ranong | 20 |
| SE Asia | Thailand | Sisakhet | 22 |
| SE Asia | Vietnam | Bu Dang | 1 |
| SE Asia | Vietnam | Bu Gia Map | 64 |
| SE Asia | Vietnam | Phuoc Long | 32 |
| West Africa | Ghana | Kassena | 549 |
| West Africa | Ghana | Kintampo | 68 |
| West Africa | Guinea | Nzerekore | 100 |
| West Africa | Mali | Bandiagara | 9 |
| West Africa | Mali | Faladje | 36 |
| West Africa | Mali | Kolle | 51 |
| West Africa | Nigeria | Ilorin | 5 |
| West Africa | Senegal | Thies | 133 |
| West Africa | Senegal | Velingara | 4 |
| West Africa | The Gambia | GM_Coastal | 65 |
# add metadata to GDS file
# first reorder according to sample.id
mgen_meta <- mgen_meta %>% arrange(sample)
if(cnt.gdsn(index.gdsn(mgen, "sample.annotation")) != 4) {
add.gdsn(index.gdsn(mgen, "sample.annotation"),
name = "acc", val = mgen_meta$acc)
add.gdsn(index.gdsn(mgen, "sample.annotation"),
name = "region", val =mgen_meta$region)
add.gdsn(index.gdsn(mgen, "sample.annotation"),
name = "country",val = mgen_meta$country)
add.gdsn(index.gdsn(mgen, "sample.annotation"),
name = "site", val = mgen_meta$site)
}
Prior to estimating the minor allele frequencies and obtaining a final set of SNPs and samples. We need to ensure we are confident in the SNP set we have obtained and that samples are of high-quality. Although these samples have been processed by there still might be samples that are poorly genotyped and SNVs that aren’t polymorphic globally or within the subpopulations.
We first look at the various quality metrics provided by the Haplotype Caller for variant annotated in the VCF files. Of particular interest is the QD MQRanksum, and SOR tags.
library(ggplot2)
library(grid)
theme_set(theme_bw())
gatk_df <- data.frame(variant.id = seqGetData(mgen, "variant.id"),
chr = seqGetData(mgen, "chromosome"),
mq = seqGetData(mgen, "annotation/info/MQ"),
mqrank = seqGetData(mgen, "annotation/info/MQRankSum"),
qd = seqGetData(mgen, "annotation/info/QD"),
sor = seqGetData(mgen, "annotation/info/SOR"))
grid.newpage()
pushViewport(viewport(layout = grid.layout(2, 2)))
print(ggplot(gatk_df, aes(x = mq)) + stat_ecdf(),
vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(ggplot(gatk_df, aes(x = mqrank)) + stat_ecdf(),
vp = viewport(layout.pos.row = 1, layout.pos.col = 2))
## Warning: Removed 220140 rows containing non-finite values (stat_ecdf).
print(ggplot(gatk_df, aes(x = qd)) + stat_ecdf(),
vp = viewport(layout.pos.row = 2, layout.pos.col = 1))
## Warning: Removed 96 rows containing non-finite values (stat_ecdf).
print(ggplot(gatk_df, aes(x = sor)) + stat_ecdf(),
vp = viewport(layout.pos.row = 2, layout.pos.col = 2))
We see that all variants are supported by high residual mean square mapping qualities over all samples. The distribution of MQRankSum scores (which is only computed at heterozygous sites) is centred at 0 with a fat right tail. For this tag only negative scores are useful since that indicates that the mapping quality for the alternate allele is low compared to the reference allele. The QD distribution is relatively symmetric and centred at around 20, most variants have relatively good ratios of average quality to average depth. The SOR tag is an odds ratio score to detect strand bias from a 2 x 2 contingency table using the strands and alt/ref allele counts as the rows and columns respectively.
We apply some hard filters first before looking at coverage distributions, * QD score > 15 * SOR < 1 * MQRankSum > -2
Next we look at the empirical cumulative distribution functions (ECDF) at:
# filter based on field samples
# filter based on qd > 20, sor > 1, mqrank > -2
v_ids <- (gatk_df %>% filter(qd > 20, sor > 1 | is.na(sor), mqrank > -2 | is.na(mqrank)))$variant.id
seqSetFilter(mgen, sample.id = field_isolates$sample, variant.id = v_ids)
## # of selected samples: 2,512
## # of selected variants: 146,997
# sample coverage
sample_5x <- perSiteCoverageBySNP(mgen, 5L)
sample_10x <- perSiteCoverageBySNP(mgen, 10L)
sample_20x <- perSiteCoverageBySNP(mgen, 20L)
cbbPalette <- c("#000000", "#E69F00", "#56B4E9")
plot(ecdf(sample_5x),
col = cbbPalette[1],
xlab = "Proportion of samples",
ylab = "Cumulative proportion of SNPs",
main = "")
lines(ecdf(sample_10x), col = cbbPalette[2])
lines(ecdf(sample_20x), col = cbbPalette[3])
legend('left', c('5x', '10x', '20x'), fill=cbbPalette, border=NA)
From the ECDF coverage plots we find that more than 90% of samples have at least 90% of theirs SNPs covered to 5 bases. Moreover, 90% of samples have at least 70% of their SNPs covered to 10 bases and 90% of samples have at least 60% of their SNPs covered by 20 bases.
# snp coverage by number of samples covered up to 5x, 10x, 10x
snp_5x <- perSiteCoverageBySample(mgen, 5L)
snp_10x <- perSiteCoverageBySample(mgen, 10L)
snp_20x <- perSiteCoverageBySample(mgen, 20L)
plot(ecdf(snp_20x),
col = cbbPalette[1], xlab = "Proportion of SNPs",
ylab = "Cumulative proportion of of samples",
main = "")
lines(ecdf(snp_10x), col = cbbPalette[2])
lines(ecdf(snp_20x), col = cbbPalette[3])
legend('left', c('5x', '10x', '20x'), fill=cbbPalette, border=NA)
Similarly, from the ECDF coverage plots we see that more than 90% of the SNPs are covered to at least 5 bases in 90% of samples. While 60% of SNPs are covered up to 10 bases in 90% of samples and 30% of SNP s are covered up to 20 bases in 90% of the samples.
Since for this analysis we are more interested in evaluating known variants (i.e. true positive variants) we applied some hard filtering to this dataset:
variant_filter <- v_ids[which(snp_10x > 0.9)]
samples_to_keep <- field_isolates$sample[which(sample_5x > 0.9)]
seqSetFilter(mgen, sample.id = samples_to_keep, variant.id = variant_filter)
## # of selected samples: 2,039
## # of selected variants: 94,106
This leaves us with 2039 samples and 671307 variants. The samples segregate by region as follows:
filtered_fi <- field_isolates %>% filter(sample %in% samples_to_keep)
knitr::kable(filtered_fi %>% count(region, country))
| region | country | n |
|---|---|---|
| Central Africa | DR of the Congo | 58 |
| Central Africa | Malawi | 357 |
| SE Asia | Bangladesh | 31 |
| SE Asia | Cambodia | 472 |
| SE Asia | Laos | 80 |
| SE Asia | Myanmar | 54 |
| SE Asia | Thailand | 132 |
| SE Asia | Vietnam | 93 |
| West Africa | Ghana | 512 |
| West Africa | Guinea | 100 |
| West Africa | Mali | 46 |
| West Africa | Nigeria | 4 |
| West Africa | Senegal | 48 |
| West Africa | The Gambia | 52 |
And the variants segregate by chromosome as follows:
coords <- getCoordinates(mgen)
knitr::kable(coords %>% count(chromosome))
| chromosome | n |
|---|---|
| Pf3D7_01_v3 | 1906 |
| Pf3D7_02_v3 | 3512 |
| Pf3D7_03_v3 | 3644 |
| Pf3D7_04_v3 | 3740 |
| Pf3D7_05_v3 | 5065 |
| Pf3D7_06_v3 | 5529 |
| Pf3D7_07_v3 | 5094 |
| Pf3D7_08_v3 | 5497 |
| Pf3D7_09_v3 | 4893 |
| Pf3D7_10_v3 | 6606 |
| Pf3D7_11_v3 | 9232 |
| Pf3D7_12_v3 | 9239 |
| Pf3D7_13_v3 | 14059 |
| Pf3D7_14_v3 | 16090 |
We save the global dataset as follows by extracting genotype calls (setting the heterozygous calls to missing at the moment) and the allele counts. # Global data set
ped_all <- extractPED(mgen, use.hets = FALSE)
acounts <- alleleCounts(mgen)
write_rds(ped_all, "processed_data/mgen_all_ped.rds")
write_rds(acounts, "processed_data/mgen_all_allele_counts.rds")
For the IBD analysis we require filters on the variants to ensure the SNPs have high enough MAFs and to annotate samples that are putatively clonal.
To do this we perform the following steps 1. Subset the calls by geographic region defined above. 2. Estimate the \(F_{ws}\) metric to annotate samples as either single clones or multiple clone infections. We use this for a quick proxy for multiple infections, before assessing this in more detail using moimix. 3. Estimate the MAFs directly from read-count data and select variants that are polymorphic in that region.
We also save the PED and MAP file as an R objects to speed up write-out.
# get central africa IDs
centr_afr <- seqGetData(mgen, "sample.id")[seqGetData(mgen, "sample.annotation/region") == "Central Africa"]
# filter gds
seqSetFilter(mgen, sample.id = centr_afr, variant.id = variant_filter)
# estimate MAF
maf_ca <- getMAF(mgen)
# remove variants that are not polymorphic at all in this region (MAF == 0)
snp_ca <- variant_filter[maf_ca > 0]
hist(maf_ca[snp_ca], breaks = 100)
seqSetFilter(mgen, sample.id = centr_afr, variant.id = snp_ca)
# estimate Fws metric
fws_central_africa <- getFws(mgen)
moi_ca <- data.frame(sample = centr_afr, fws = fws_central_africa,
moi = ifelse(fws_central_africa > 0.95, 1, 2))
hist(fws_central_africa, breaks = 100)
plink_ca <- extractPED(mgen, moi.estimates = moi_ca$moi)
write_rds(plink_ca, "processed_data/central_africa.rds")
seqSetFilter(mgen)
# get central africa IDs
west_afr <- seqGetData(mgen, "sample.id")[seqGetData(mgen, "sample.annotation/region") == "West Africa"]
# filter gds
seqSetFilter(mgen, sample.id = west_afr, variant.id = variant_filter)
# estimate MAF
maf_wa <- getMAF(mgen)
# remove variants that are not polymorphic at all in this region (MAF == 0)
snp_wa <- variant_filter[maf_ca > 0]
hist(maf_wa[snp_wa], breaks = 100)
seqSetFilter(mgen, sample.id = west_afr, variant.id = snp_wa)
# estimate Fws metric
fws_west_africa <- getFws(mgen)
moi_wa <- data.frame(sample = west_afr, fws = fws_west_africa,
moi = ifelse(fws_west_africa > 0.95, 1, 2))
hist(fws_west_africa, breaks = 100)
plink_wa <- extractPED(mgen, moi.estimates = moi_wa$moi)
write_rds(plink_wa, "processed_data/west_africa.rds")
seqSetFilter(mgen)
# get central africa IDs
se_asia <- seqGetData(mgen, "sample.id")[seqGetData(mgen,
"sample.annotation/region") == "SE Asia"]
# filter gds
seqSetFilter(mgen, sample.id = se_asia, variant.id = variant_filter)
# estimate MAF
maf_sea <- getMAF(mgen)
# remove variants that are not polymorphic at all in this region (MAF == 0)
snp_sea <- variant_filter[maf_sea > 0]
hist(maf_sea[snp_sea], breaks = 100)
seqSetFilter(mgen, sample.id = se_asia, variant.id = snp_sea)
# estimate Fws metric
fws_se_asia <- getFws(mgen)
moi_sea <- data.frame(sample = se_asia, fws = fws_se_asia,
moi = ifelse(fws_se_asia > 0.95, 1, 2))
hist(fws_se_asia, breaks = 100)
plink_sea <- extractPED(mgen, moi.estimates = moi_sea$moi)
write_rds(plink_sea, "processed_data/se_asia.rds")
seqSetFilter(mgen)
We also save the variant annotations across all populations.
seqSetFilter(mgen, variant.id = variant_filter)
annotations <- data.frame(chr = seqGetData(mgen, "chromosome"),
pos = seqGetData(mgen, "position"),
gene_name = seqGetData(mgen, "annotation/info/SNPEFF_GENE_NAME"),
gene_biotype = seqGetData(mgen, "annotation/info/SNPEFF_GENE_BIOTYPE"),
snp_effect = seqGetData(mgen, "annotation/info/SNPEFF_AMINO_ACID_CHANGE"),
snp_functional_class = seqGetData(mgen, "annotation/info/SNPEFF_EFFECT"),
snp_aa_change = seqGetData(mgen, "annotation/info/SNPEFF_AMINO_ACID_CHANGE"),
snp_codon_change = seqGetData(mgen, "annotation/info/SNPEFF_CODON_CHANGE"),
exon_id = seqGetData(mgen, "annotation/info/SNPEFF_EXON_ID"),
transcript_id = seqGetData(mgen, "annotation/info/SNPEFF_TRANSCRIPT_ID"))
# counts
knitr::kable(annotations %>%
count(snp_functional_class, gene_biotype))
write_rds(annotations, "processed_data/snp_annotations_clean.rds")
To construct polymorphic markers for the known mixture strains we extract a set of high MAF and high heterzygous set of global markers from the global field isolate sets.
seqSetFilter(mgen, sample.id = samples_to_keep, variant.id = variant_filter)
## # of selected samples: 2,039
## # of selected variants: 94,106
maf_all <- getMAF(mgen)
het_all <- getHeterozygosity(mgen)
plot(ecdf(maf_all))
plot(ecdf(het_all))
We see that as observed in the other populations the MAF distribution is biased to rare alleles. If we choose SNVs that are heterozygous, this should give us a cleaner signal for detecting MOI.
mix_inform_snps <- variant_filter[maf_all > 0.01 & het_all > 0.01]
# read in mixture metadata
mixture <- read_rds("processed_data/mixtures_all.rds")
seqSetFilter(mgen, variant.id = mix_inform_snps, sample.id = mixture$sample)
## # of selected samples: 32
## # of selected variants: 923
We also plot the BAF frequencies to see if the signal remains in this reduced variant set.
bf <- bafMatrix(mgen)
for(sample in mixture$sample) {
plot(bf, sample)
}
Finally, we export the mixture data to a GDS file and make it available to moimix.
seqExport(mgen, "processed_data/mixtures_clean.gds")
## Export to 'processed_data/mixtures_clean.gds'
## sample.id (32) [md5: de9681146247db2f63aa94d6ec8e4de0]
## variant.id (923) [md5: cdc7ad921a8b912e8ae12e9c7e62e463]
## position [md5: efb2423dff4ced87d14f0453e9019a93]
## chromosome [md5: cab40fede2ecb3de0ccce44f4a42a461]
## allele [md5: 9e7a22bc207db5e5af080de4a6210b48]
## genotype [md5: c62f6c0aa6dfe09916663aaa0763057b]
## annotation/id [md5: 59c671de74c771a9d00a1c72b115e54f]
## annotation/qual [md5: 198fa98be260eccdaf77837a4aafc9e1]
## annotation/filter [md5: ad560616ce33f9fea39d2434ef4c046b]
## annotation/info/AC [md5: db558ad5f5777076a2fab7e5966bf61e]
## annotation/info/AF [md5: 225ad0aaf763ed8991a30144ffa4cd72]
## annotation/info/AN [md5: 258610dc03fe4aba632bf8ebb6865fad]
## annotation/info/BaseQRankSum [md5: 3ce8e11d042949865ebaf6a413031edc]
## annotation/info/ClippingRankSum [md5: eb11f7dc0390aeb392ca04b49f619d08]
## annotation/info/DP [md5: 5468b6f8ac54ffa7017e1a3cb2c8e747]
## annotation/info/FS [md5: bd2eedfa6643ef0d06f54cda4c1f4aae]
## annotation/info/InbreedingCoeff [md5: 0627d6c4b90dec4b83445596502116ed]
## annotation/info/MQ [md5: 391cda483f7568129ea004ff301b5bcd]
## annotation/info/MQRankSum [md5: b4ac8ea50abb453a296e736bded50d2f]
## annotation/info/QD [md5: 131a085f292ee6bd2a13ecd89029b4df]
## annotation/info/ReadPosRankSum [md5: 368b483ff3744d47af0dee376f34706a]
## annotation/info/SNPEFF_AMINO_ACID_CHANGE [md5: 64f80c0ee511ff53e1879a6da9c38f34]
## annotation/info/SNPEFF_CODON_CHANGE [md5: e6b4326d1ddf8fcd8547a306c28122e3]
## annotation/info/SNPEFF_EFFECT [md5: fc2928d27e6358971bd76e966d57d966]
## annotation/info/SNPEFF_EXON_ID [md5: cdb4153137842d7b06b86bd9eece0da8]
## annotation/info/SNPEFF_FUNCTIONAL_CLASS [md5: af7a017b50dad6ab57011a61134bc50e]
## annotation/info/SNPEFF_GENE_BIOTYPE [md5: b126187cf6bb7771c75ef003e242c284]
## annotation/info/SNPEFF_GENE_NAME [md5: 51255627db312e97c5957e860454919b]
## annotation/info/SNPEFF_IMPACT [md5: b520bb2d4de217666c498ecab2648ef4]
## annotation/info/SNPEFF_TRANSCRIPT_ID [md5: 70687a2fe931204a2718b28d71155a34]
## annotation/info/SOR [md5: fe205d7a0ad29b30ef530337925c6c07]
## annotation/info/VQSLOD [md5: 2867208a220548207a1a11b581d54ee2]
## annotation/format/AD [md5: 37788fb3a8a0cb0b4e35bf0fd65d234d]
## annotation/format/DP [md5: 42f5779920a1a5f5d79cd783dde17ee1]
## annotation/format/GQ [md5: 1e426bbc26a663edd4f5a53255828b46]
## annotation/format/PGT [md5: fca1c4985e8ea96a1886d1077626f623]
## annotation/format/PID [md5: a04b1954c6c2be792e60972e216abdde]
## annotation/format/PL [md5: 6002a503e1f9d69def8917caa279a175]
## sample.annotation/acc [md5: beac0a46021a580ebac39d24a8d25872]
## sample.annotation/region [md5: 70bc8f4b72a86921468bf8e8441dce51]
## sample.annotation/country [md5: 70bc8f4b72a86921468bf8e8441dce51]
## sample.annotation/site [md5: 70bc8f4b72a86921468bf8e8441dce51]
## Done.
## Optimize the access efficiency ...
## Clean up the fragments of GDS file:
## open the file 'processed_data/mixtures_clean.gds' (260.9K)
## # of fragments: 277
## save to 'processed_data/mixtures_clean.gds.tmp'
## rename 'processed_data/mixtures_clean.gds.tmp' (259.3K, reduced: 1.6K)
## # of fragments: 141
# tidy up gds
seqSetFilter(mgen)
## # of selected samples: 2,640
## # of selected variants: 1,057,870
seqClose(mgen)
# cleanup.gds("processed_data/malaria_gen5.gds")
sessionInfo()
## R version 3.3.0 (2016-05-03)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.9.5 (Mavericks)
##
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggplot2_2.1.0 readr_0.2.2 moimix_0.0.1.9001 flexmix_2.3-13
## [5] lattice_0.20-33 dplyr_0.4.3 SeqArray_1.12.3 gdsfmt_1.8.2
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.5 Rsamtools_1.24.0
## [3] Biostrings_2.40.0 assertthat_0.1
## [5] digest_0.6.9 R6_2.1.2
## [7] GenomeInfoDb_1.8.1 plyr_1.8.4
## [9] MatrixModels_0.4-1 stats4_3.3.0
## [11] RSQLite_1.0.0 evaluate_0.9
## [13] coda_0.18-1 httr_1.1.0
## [15] highr_0.6 zlibbioc_1.18.0
## [17] GenomicFeatures_1.24.2 lazyeval_0.1.10
## [19] curl_0.9.7 SparseM_1.7
## [21] S4Vectors_0.10.0 Matrix_1.2-6
## [23] rmarkdown_0.9.6 labeling_0.3
## [25] devtools_1.11.1 BiocParallel_1.6.2
## [27] stringr_1.0.0 GWASExactHW_1.01
## [29] RCurl_1.95-4.8 biomaRt_2.28.0
## [31] munsell_0.4.3 rtracklayer_1.32.0
## [33] BiocGenerics_0.18.0 mcmc_0.9-4
## [35] htmltools_0.3.5 nnet_7.3-12
## [37] SummarizedExperiment_1.2.2 IRanges_2.6.0
## [39] XML_3.98-1.4 crayon_1.3.1
## [41] withr_1.0.1 GenomicAlignments_1.8.0
## [43] MASS_7.3-45 bitops_1.0-6
## [45] gtable_0.2.0 DBI_0.4-1
## [47] git2r_0.15.0 magrittr_1.5
## [49] formatR_1.4 SeqVarTools_1.10.0
## [51] scales_0.4.0 stringi_1.0-1
## [53] XVector_0.12.0 RColorBrewer_1.1-2
## [55] tools_3.3.0 BSgenome_1.40.0
## [57] Biobase_2.32.0 parallel_3.3.0
## [59] yaml_2.1.13 AnnotationDbi_1.34.2
## [61] colorspace_1.2-6 GenomicRanges_1.24.0
## [63] memoise_1.0.0 logistf_1.21
## [65] knitr_1.13 VariantAnnotation_1.18.1
## [67] quantreg_5.26 MCMCpack_1.3-6
## [69] modeltools_0.2-21